{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/CoreTheGreat/HBPU-Machine-Learning-Course/blob/main/ML_Chapter3_Classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "lPboLx_o0UxI"
   },
   "source": [
    "# 第三章：分类\n",
    "湖北理工学院《机器学习》课程资料\n",
    "\n",
    "作者：李辉楚吴\n",
    "\n",
    "笔记内容概述: 绘制混淆矩阵 Confusion Matrix\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Ifpm7Sql4U09"
   },
   "source": [
    "## Step 1: 数据准备\n",
    "\n",
    "使用make_moons生成虚拟数据构建分类任务"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "sM-ziKb94S9_",
    "outputId": "9a8bd59b-1ed4-47e3-e118-cee1849a610a"
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import make_moons\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "label_size = 18 # Label size\n",
    "ticklabel_size = 14 # Tick label size\n",
    "\n",
    "# Generate moon-shaped data\n",
    "X, Y = make_moons(n_samples=1000, noise=0.25, random_state=42)\n",
    "\n",
    "# Split X and Y into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)\n",
    "\n",
    "# Plot the data\n",
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "ax.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], color='red', label='Positive - Train')\n",
    "ax.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], color='orange', label='Positive - Test')\n",
    "ax.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], color='blue', label='Negative - Train')\n",
    "ax.scatter(X_test[y_test == 0, 0], X_test[y_test == 0, 1], color='green', label='Negative - Test')\n",
    "ax.set_xlabel('Feature 1', fontsize=label_size)\n",
    "ax.set_ylabel('Feature 2', fontsize=label_size)\n",
    "\n",
    "ax.tick_params(axis='both', which='major', labelsize=ticklabel_size)\n",
    "\n",
    "# Set legend fontsize\n",
    "plt.legend(prop={'size': 14})\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2: 使用逻辑回归、SVM、决策树和随机森林模型进行分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "BWPSDyOq5qOO",
    "outputId": "9aed06f2-67a9-4a6f-f00c-d91554c3a260"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "# Model Definition\n",
    "mdl_lr = LogisticRegression() # Logistic Regression\n",
    "mdl_svm = SVC() # Support Vector Machine\n",
    "mdl_dt = DecisionTreeClassifier() # Decision Tree\n",
    "mdl_rf = RandomForestClassifier(n_estimators=100) # Random Forests\n",
    "\n",
    "# Model Training\n",
    "mdl_lr.fit(X_train, y_train) # Logistic Regression\n",
    "print('Logistic regression model trained successfully...')\n",
    "\n",
    "mdl_svm.fit(X_train, y_train) # Support Vector machine\n",
    "print('Support vector machine trained successfully...')\n",
    "\n",
    "mdl_dt.fit(X_train, y_train) # Decision Tree\n",
    "print('Decision tree trained successfully...')\n",
    "\n",
    "mdl_rf.fit(X_train, y_train) # Random Forest\n",
    "print('Random forests trained successfully...')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "BtGNbqlSKnC-"
   },
   "source": [
    "## Step 3: 混淆矩阵 Confusion Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns # To display confusion matrix\n",
    "    \n",
    "def confusion_matrix(y_pred, y):\n",
    "    '''\n",
    "    y_pred - predict classes\n",
    "    y - actual classes\n",
    "    '''\n",
    "    # Get the number of classes\n",
    "    n_classes = len(np.unique(y))\n",
    "    \n",
    "    # Initialize the confusion matrix with zeros\n",
    "    cm = np.zeros((n_classes, n_classes))\n",
    "    \n",
    "    # Fill the confusion matrix\n",
    "    for pred, actual in zip(y_pred, y):\n",
    "        cm[pred][actual] += 1\n",
    "    \n",
    "    return cm\n",
    "\n",
    "def cm_disp(cm, title='Confusion Matrix', save_fig=False):\n",
    "    ''' \n",
    "    Display confusion matrix\n",
    "    '''\n",
    "    global label_size, ticklabel_size\n",
    "    \n",
    "    # Create a figure and axis\n",
    "    fig, ax = plt.subplots(figsize=(6, 4))\n",
    "    \n",
    "    # Use seaborn to create a heatmap without color bar\n",
    "    sns.heatmap(cm, annot=True, fmt='.0f', cmap='Blues', ax=ax, annot_kws={'size': ticklabel_size}, cbar=False)\n",
    "    \n",
    "    # Set ticklabels\n",
    "    ax.set_xticklabels(['Positive', 'Negative'])\n",
    "    ax.set_yticklabels(['Positive', 'Negative'], rotation=90)\n",
    "    \n",
    "    # Set labels and title with custom font sizes\n",
    "    ax.set_xlabel('Predicted labels', fontsize=label_size)\n",
    "    ax.set_ylabel('True labels', fontsize=label_size)\n",
    "    ax.set_title(title, fontsize=label_size)\n",
    "    \n",
    "    # Set tick label font size\n",
    "    ax.tick_params(axis='both', which='major', labelsize=ticklabel_size)\n",
    "    \n",
    "    if save_fig:\n",
    "        plt.savefig(f'{title.replace(\" \", \"_\")}_confusion_matrix.png', dpi=300, bbox_inches='tight')\n",
    "    \n",
    "    # Show the plot\n",
    "    plt.show()\n",
    "    \n",
    "# Get confusion matrix of all models\n",
    "# Predictions for each model\n",
    "y_pred_lr = mdl_lr.predict(X_test)\n",
    "y_pred_svm = mdl_svm.predict(X_test)\n",
    "y_pred_dt = mdl_dt.predict(X_test)\n",
    "y_pred_rf = mdl_rf.predict(X_test)\n",
    "\n",
    "# Confusion matrices for each model\n",
    "cm_lr = confusion_matrix(y_pred_lr, y_test)\n",
    "cm_svm = confusion_matrix(y_pred_svm, y_test)\n",
    "cm_dt = confusion_matrix(y_pred_dt, y_test)\n",
    "cm_rf = confusion_matrix(y_pred_rf, y_test)\n",
    "\n",
    "# Display the confusion matrices\n",
    "save_flag = False\n",
    "cm_disp(cm_lr, title='Logistic Regression', save_fig=save_flag)\n",
    "cm_disp(cm_svm, title='Support Vector Machine', save_fig=save_flag)\n",
    "cm_disp(cm_dt, title='Decision Tree', save_fig=save_flag)\n",
    "cm_disp(cm_rf, title='Random Forest', save_fig=save_flag)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4: 计算各模型的准确率 Accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getAccuracyFromCM(cm):\n",
    "    # Calculate total number of samples\n",
    "    total = np.sum(cm)\n",
    "    \n",
    "    # Calculate number of correct predictions (sum of diagonal elements)\n",
    "    correct = np.trace(cm)\n",
    "    \n",
    "    # Calculate accuracy\n",
    "    accuracy = correct / total\n",
    "    \n",
    "    return accuracy\n",
    "\n",
    "# Accuracy of each model\n",
    "acc_lr = getAccuracyFromCM(cm_lr)\n",
    "acc_svm = getAccuracyFromCM(cm_svm)\n",
    "acc_dt = getAccuracyFromCM(cm_dt)\n",
    "acc_rf = getAccuracyFromCM(cm_rf)\n",
    "\n",
    "# Print accuracies\n",
    "print(f\"Logistic Regression Accuracy: {acc_lr:.4f}\")\n",
    "print(f\"Support Vector Machine Accuracy: {acc_svm:.4f}\")\n",
    "print(f\"Decision Tree Accuracy: {acc_dt:.4f}\")\n",
    "print(f\"Random Forest Accuracy: {acc_rf:.4f}\")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyO5gS9/MePw+FDiXJA07L6y",
   "include_colab_link": true,
   "provenance": []
  },
  "kernelspec": {
   "display_name": "machinelearning",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
